{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pqMl9Ouxf6yn"
   },
   "source": [
    "# Data drift dashboard in jupyter notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    import evidently\n",
    "except:\n",
    "    !pip install git+https://github.com/evidentlyai/evidently.git"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "8FP6JHGUf6ys"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import requests\n",
    "import zipfile\n",
    "import io\n",
    "\n",
    "from datetime import datetime, time\n",
    "from sklearn import datasets, ensemble\n",
    "\n",
    "from evidently import ColumnMapping\n",
    "from evidently.report import Report\n",
    "from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, RegressionPreset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "x3OWHRQ0f6yv"
   },
   "source": [
    "## Bicycle Demand Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Yx-GhFajUgNA"
   },
   "source": [
    "This step automatically downloads the bike dataset from UCI. This version is slightly different from the dataset used in Kaggle competition. If you want the example to be identical to the one in the Evidently blog \"How to break a model in 20 days\", you can manually download the dataset from Kaggle: https://www.kaggle.com/c/bike-sharing-demand/data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "uMLMel0Cf6yw"
   },
   "outputs": [],
   "source": [
    "content = requests.get(\"https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip\", verify=False).content\n",
    "with zipfile.ZipFile(io.BytesIO(content)) as arc:\n",
    "    raw_data = pd.read_csv(arc.open(\"hour.csv\"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "pNwLFltuf6yx"
   },
   "outputs": [],
   "source": [
    "raw_data.index = raw_data.apply(\n",
    "    lambda row: datetime.combine(row.name, time(hour=int(row['hr']))), axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4VypH7uAf6yz"
   },
   "source": [
    "## Regression Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9YMdccRpf6y3"
   },
   "source": [
    "### Model training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zbp6euUtf6y3"
   },
   "outputs": [],
   "source": [
    "target = 'cnt'\n",
    "prediction = 'prediction'\n",
    "numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'hr', 'weekday']\n",
    "categorical_features = ['season', 'holiday', 'workingday']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "T8jbMDVwf6y4"
   },
   "outputs": [],
   "source": [
    "reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']\n",
    "current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SQLQTJy7f6y4",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "reference.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "im2Bqd3zf6y5"
   },
   "outputs": [],
   "source": [
    "regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "bt_5Kfu-f6y5"
   },
   "outputs": [],
   "source": [
    "regressor.fit(reference[numerical_features + categorical_features], reference[target])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-7b4UNq2f6y6"
   },
   "outputs": [],
   "source": [
    "ref_prediction = regressor.predict(reference[numerical_features + categorical_features])\n",
    "current_prediction = regressor.predict(current[numerical_features + categorical_features])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "tt79FK5Mf6y6"
   },
   "outputs": [],
   "source": [
    "reference['prediction'] = ref_prediction\n",
    "current['prediction'] = current_prediction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4i2oy0k1f6y7"
   },
   "source": [
    "### Model Performance "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Rg0JipUif6y7"
   },
   "outputs": [],
   "source": [
    "column_mapping = ColumnMapping()\n",
    "\n",
    "column_mapping.target = target\n",
    "column_mapping.prediction = prediction\n",
    "column_mapping.numerical_features = numerical_features\n",
    "column_mapping.categorical_features = categorical_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fFGbeOJjf6y8"
   },
   "outputs": [],
   "source": [
    "regression_performance = Report(metrics=[RegressionPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "regression_performance.run(current_data=reference, reference_data=None, column_mapping=column_mapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "KSon-d2pf6y8",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "regression_performance.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "uVckFZGmf6y8"
   },
   "outputs": [],
   "source": [
    "#regression_performance.save('reports/regression_performance_at_training.html')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "62qRRKmhf6y9"
   },
   "source": [
    "##  Week 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6G7KzxSxf6y9",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "regression_performance = Report(metrics=[RegressionPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "regression_performance.run(current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], \n",
    "                          reference_data=reference,\n",
    "                          column_mapping=column_mapping)\n",
    "\n",
    "regression_performance.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "z9C2uXsAf6y-",
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "#regression_performance.save('reports/regression_performance_after_week1.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "afFgUlwvf6y-",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "target_drift = Report(metrics=[TargetDriftPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "target_drift.run(current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],\n",
    "                 reference_data=reference,\n",
    "                 column_mapping=column_mapping)\n",
    "\n",
    "target_drift.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "5AyuTggif6y_"
   },
   "outputs": [],
   "source": [
    "#target_drift.save('reports/target_drift_after_week1.html')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "xnrzxRRnf6y_"
   },
   "source": [
    "## Week 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "gJgKaVAmf6zA",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "regression_performance = Report(metrics=[RegressionPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "regression_performance.run(current_data=current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], \n",
    "                          reference_data=reference,\n",
    "                          column_mapping=column_mapping)\n",
    "\n",
    "regression_performance.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "46BC9kmHf6zA"
   },
   "outputs": [],
   "source": [
    "#regression_performance.save('reports/regression_performance_after_week2.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ekzNzVCLf6zB",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "target_drift = Report(metrics=[TargetDriftPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "target_drift.run(current_data=current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'],\n",
    "                 reference_data=reference,\n",
    "                 column_mapping=column_mapping)\n",
    "\n",
    "target_drift.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "MDgINUcff6zC"
   },
   "outputs": [],
   "source": [
    "#target_drift.save('reports/target_drift_after_week2.html')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "BAXuFSrTf6zC"
   },
   "source": [
    "## Week 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SQh4cHS2f6zC"
   },
   "outputs": [],
   "source": [
    "regression_performance = Report(metrics=[RegressionPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "regression_performance.run(current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], \n",
    "                          reference_data=reference,\n",
    "                          column_mapping=column_mapping)\n",
    "\n",
    "regression_performance.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "f4zo4najf6zD"
   },
   "outputs": [],
   "source": [
    "#regression_performance.save('reports/regression_performance_after_week3.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SI-ojL1Ff6zD"
   },
   "outputs": [],
   "source": [
    "target_drift = Report(metrics=[TargetDriftPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "target_drift.run(current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'],\n",
    "                 reference_data=reference,\n",
    "                 column_mapping=column_mapping)\n",
    "\n",
    "target_drift.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "J7LuOx-3f6zD"
   },
   "outputs": [],
   "source": [
    "#target_drift.save('reports/target_drift_after_week3.html')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "imlXej4of6zE"
   },
   "source": [
    "## Data Drift"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "x-CrzsxSf6zE"
   },
   "outputs": [],
   "source": [
    "column_mapping = ColumnMapping()\n",
    "\n",
    "column_mapping.numerical_features = numerical_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "cKdcopYPf6zE"
   },
   "outputs": [],
   "source": [
    "data_drift = Report(metrics = [DataDriftPreset()], options={\"render\": {\"raw_data\": True}})\n",
    "data_drift.run(current_data = current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],\n",
    "               reference_data = reference,\n",
    "               column_mapping=column_mapping)\n",
    "\n",
    "data_drift.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Xy7Y6S8Af6zF"
   },
   "outputs": [],
   "source": [
    "#data_drift.save(\"reports/data_drift_dashboard_after_week1.html\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Support Evidently\n",
    "Enjoyed the tutorial? Star Evidently on GitHub to contribute back! This helps us continue creating free open-source tools for the community. https://github.com/evidentlyai/evidently"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [
    "xnrzxRRnf6y_",
    "BAXuFSrTf6zC"
   ],
   "name": "bicycle_demand_monitoring.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
